## master preamble ------------------------------------------------------
rm(list=ls(all=TRUE)) 

## inherit directories from stata -----------
args = commandArgs(trailingOnly = "TRUE")
if (length(args)) {
  dir.proj = paste0(args[1],'/')
  dir.lib  = args[2]
} 

## if loading from library -------------------
if(dir.lib!='') {
  ## load depenencies --------------------
  library(R.utils,lib.loc=dir.lib)
  library(ggplot2,lib.loc=dir.lib)
  library(tidyr,lib.loc=dir.lib)
  library(stringr,lib.loc=dir.lib)
  library(forcats,lib.loc=dir.lib)
  library(lubridate,lib.loc=dir.lib)
  library(haven,lib.loc=dir.lib)
  library(data.table,lib.loc=dir.lib)
  ## load main packages ------------------
  library(rio,lib.loc=dir.lib)
  library(dplyr,lib.loc=dir.lib)
  library(tidyverse,lib.loc=dir.lib)
  library(fixest,lib.loc=dir.lib)
## otherwise: simple load -----------------
} else {
  library(rio)
  library(dplyr)
  library(tidyverse)
  library(fixest)
}
## --------------------------------------------------------------------

## file preamble ------------------------------------------------------
## randomization seed --------
set.seed(413)

## directories --------
dir.data = paste0(dir.proj,'data/out/')
dir.est  = paste0(dir.proj,'estimates/')
dir.temp = paste0(dir.proj,'_temp/')

## file-specific -------
name.Y   = 'cite_ny1'
name.out = 'recid_bins'
bootrep  = 250

## Functions ----------------
rudirichlet <- function(n, d) {
  rexp.mat <- matrix( rexp(d * n, 1) , nrow = n, ncol = d)
  dirichlet.weights <- rexp.mat / rowSums(rexp.mat)
  dirichlet.weights }
## ---------------------------------------------------------------------


## Setup data -------------------
data = import(paste0(dir.data,'4-main.dta'))
data = mutate(data,Y=data[[name.Y]],D=harsh,W=covbin1,jid=officerid) %>%
  select(citationid,Y,D,Z,W,totfe,jid)

## Setup for clustered bootstrap -------------------
list    = unique(select(data,jid))
weights = rudirichlet(nrow(list),bootrep)*10


## Looping -----------
for(j in 0:bootrep) {
  
  ## Setup bootstrap data --------
  if(j==0) {
    boot.list = list %>% mutate(bootwt = 1)
    boot.data = inner_join(data,boot.list,by='jid') }
  if(j>=1) {
    boot.list = list %>% mutate(bootwt = weights[,j])
    boot.data = inner_join(data,boot.list,by='jid') }
  
  ## Get sample means ------------
  y = weighted.mean(boot.data$Y,boot.data$bootwt)
  p = weighted.mean(boot.data$D,boot.data$bootwt)
  
  ## Loop over binwidth------
  b.list = c(0.5,0.2,0.1,0.05,0.01)
  for(b in b.list) {
    
    width = b
    nbin  = 1/b
    boot.data = mutate(
      boot.data,
      bin = as.numeric(as.factor(cut(Z,seq(0,1,width),right=F,include.lowest=T))))
    
    for(i in 1:max(boot.data$bin)) {
      vname = paste('bin_',i,sep='')
      boot.data[vname] <- as.numeric(boot.data$bin>= i) }
    boot.data = boot.data %>% mutate(across(starts_with('bin_'),
                                  function(x) (x*Z),
                                  .names = "Z_{.col}"))
    
    ## Spline fit -----------
    fit = fixest::feols(
      as.formula(glue::glue('Y~',paste(paste0('Z_bin_',seq(1:nbin)),collapse='+'),'|totfe')),
      boot.data,weights=~bootwt)
    
    ## Compute pars ----
    fe = stats::predict(fit,newdata=boot.data,fixef=T)
    y0 = mean(fe$totfe)
    y1 = mean(fe$totfe)+sum(fit$coefficients)
    
    ## Other pars -------
    fit = feols(Y~D,boot.data,weights=~bootwt)
    y0d0 = as.numeric(fit$coefficients[1])
    y1d1 = as.numeric(fit$coefficients[1]+fit$coefficients[2])
    
    ## Post output -----
    est = data.frame(
      par=c('Y','p','Y0','Y1','ATE','ATT','ATU',
            'Y1D1','Y1D0','Y0D0','Y0D1','Y0D1mY0D0','ATTmATU'),
      est=c(y,p,y0,y1,(y1-y0),(y-y0)/p,(y1-y)/(1-p),
            y1d1,(1/(1-p))*y1-(p/(1-p))*y1d1,
            y0d0,(1/p)*y0-((1-p)/p)*y0d0,
            ((1/p)*y0-((1-p)/p)*y0d0)-y0d0,
            ((y-y0)/p)-(y1-y)/(1-p))) %>% mutate(iter=j,bw=b,type='linear')
    if(j==0 & b==b.list[1]) {
      boot.out = est }
    else { boot.out = bind_rows(boot.out,est) }

  
    ## Mean fit -----------
    fit = fixest::feols(
      as.formula(glue::glue('Y~i(bin,ref=1)|totfe')),
      boot.data,weights=~bootwt)
  
    ## Compute pars ----
    fe = stats::predict(fit,newdata=boot.data,fixef=T)
    y0 = mean(fe$totfe)
    y1 = y0 + fit$coefficients[length(fit$coefficients)]
  
    ## Other pars -------
    fit = feols(Y~D,boot.data,weights=~bootwt)
    y0d0 = as.numeric(fit$coefficients[1])
    y1d1 = as.numeric(fit$coefficients[1]+fit$coefficients[2])
  
    ## Post output -----
    est = data.frame(
      par=c('Y','p','Y0','Y1','ATE','ATT','ATU',
          'Y1D1','Y1D0','Y0D0','Y0D1','Y0D1mY0D0','ATTmATU'),
      est=c(y,p,y0,y1,(y1-y0),(y-y0)/p,(y1-y)/(1-p),
          y1d1,(1/(1-p))*y1-(p/(1-p))*y1d1,
          y0d0,(1/p)*y0-((1-p)/p)*y0d0,
          ((1/p)*y0-((1-p)/p)*y0d0)-y0d0,
          ((y-y0)/p)-(y1-y)/(1-p))) %>% mutate(iter=j,bw=b,type='mean')
    boot.out = bind_rows(boot.out,est)
    
    ## Clear out dataset ------
    boot.data = select(boot.data,-c(starts_with('bin'),starts_with('Z_bin_')))
    
  }
  
  ## Progress report -----
  print(paste('Iteration',j,'of',bootrep,'Completed'))
  
}
## End of bootstrap -------------------------


## Bootstrap output -----------
est.main = boot.out %>% filter(iter==0) %>% select(bw,type,par,est)
est.boot = boot.out %>% filter(iter>=1) %>%
  group_by(bw,type,par) %>% summarize(se=sd(est),lq=quantile(est,0.05),uq=quantile(est,0.95))
est.out  = inner_join(est.main,est.boot) %>% mutate(lb=est-1.96*se,ub=est+1.96*se)
export(est.out,paste0(dir.est,name.out,'.csv'))


